1 Data Preparation

ps <- read.csv(file.path(data_folder, ps_folder, "pupil_data_with_demo_20220926.csv"))

ps$demo_gender <- factor(ps$demo_gender, 
                         levels = c(1,2), 
                         labels = c("Male", "Female"))

ps$user_cat <- NA
ps$user_cat[ps$user_type == "non-user"] <- 0
ps$user_cat[ps$user_type == "occasional"] <- 1
ps$user_cat[ps$user_type == "daily"] <- 2

ps$user_cat <- factor(ps$user_cat, 
                      levels = c(0,1,2), 
                      labels = c("non-user", 
                                 "occasional", 
                                 "daily"))

ps$tp <- NA
ps$tp[ps$time == "pre2"] <- 0
ps$tp[ps$time == "post"] <- 1

ps$tp <- factor(ps$tp, 
                levels = c(0,1), 
                labels = c("pre2", "post"))
# subject level data 
pt.df <- unique(ps[, c("subject_id", "tp", "user_cat", "demo_age", 
                "demo_weight", "demo_height", "demo_gender", "thc")])

There are more subjects in total than by time point. Indicating incomplete data with some subjects missing “pre” and others missing “post” measurement.

Add scalar features for each participant-time point

scalar.feat <- read.csv(file.path(data_folder, ps_folder, "scalars_trim.csv"), 
                        header = T, stringsAsFactors = F)

scalar.feat$subject_id <- substr(scalar.feat$timeid, 1, 7)
scalar.feat$tp <- substr(scalar.feat$timeid, 8, 11)

scalar.featR <- scalar.feat[scalar.feat$eye == "Right", ]

pt.df <- merge(pt.df, scalar.featR[, c("subject_id", "tp", "min_constriction", 
                                       "duration", "avg_end_percent", "slope", 
                                       "AUC", "eye")], 
               by = c("subject_id", "tp"))

Reshape participant demog data to preserve pre and post THC levels and scalar features

pt.df.w <- reshape(pt.df, 
                   timevar = "tp", 
                   idvar = c("subject_id", "user_cat", "demo_age", 
                             "demo_weight", "demo_height", "demo_gender", "eye"), 
                   direction = "wide")
pt.df.w$thc.post[pt.df.w$user_cat == "non-user" & is.na(pt.df.w$thc.post)] <- 0
pt.df.w$thc_chg <- pt.df.w$thc.post - pt.df.w$thc.pre2
pt.df.w$BMI <- pt.df.w$demo_weight*703/(pt.df.w$demo_height)^2
pt.df.w$min_constriction_chg <- pt.df.w$min_constriction.post - pt.df.w$min_constriction.pre2
pt.df.w$AUC_chg <- pt.df.w$AUC.post - pt.df.w$AUC.pre2
pt.df.w$duration_chg <- pt.df.w$duration.post - pt.df.w$duration.pre2
pt.df.w$avg_end_percent_chg <- pt.df.w$avg_end_percent.post - pt.df.w$avg_end_percent.pre2
pt.df.w$slope_chg <-  pt.df.w$slope.post - pt.df.w$slope.pre2

1.1 Add time from smoking to post test

## Need to work on; time formatted in Excel file.
testTimes <- read.xlsx(file.path(data_folder, ps_folder, "All SafetyScan Times_23Aug2022_revSG.xlsx"), 
                       sheet = "Raw")

testTimes$pre_safetyscan_date_convert <- as.Date(testTimes$pre_safetyscan_date, 
                                                 origin = "1899-12-30")

testTimes$pre_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
                                                           " ", testTimes$pre_safetyscan_time_hr,
                                                           ":", testTimes$pre_safetyscan_time_min,
                                                           ":", "00"), 
                                                    format = "%Y-%m-%d %H:%M:%S")

testTimes$consump_start_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
                                                          " ", testTimes$consump_start_time_hr, 
                                                          ":", testTimes$consump_start_time_min,
                                                          ":", "00"), 
                                                   format = "%Y-%m-%d %H:%M:%S")

testTimes$consump_end_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
                                                        " ", testTimes$consump_end_time_hr, 
                                                        ":", testTimes$consump_end_time_min, 
                                                        ":", "00"), 
                                                 format = "%Y-%m-%d %H:%M:%S")

testTimes$post_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
                                                            " ", testTimes$post_safetyscan_time_hr,
                                                            ":", testTimes$post_safetyscan_time_min,
                                                            ":", "00"), 
                                                     format = "%Y-%m-%d %H:%M:%S")

testTimes$postConsumpTimeToTest <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
                                                       testTimes$consump_end_time_convert,
                                                       units = "mins"))

testTimes$Post_PreTime <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
                                              testTimes$pre_safetyscan_time_convert,
                                              units = "mins"))

testTimes$remove <- ifelse(testTimes$subject_id == "001-056" & is.na(testTimes$postConsumpTimeToTest), 1, 0)
testTimes <- testTimes[testTimes$remove == 0, ]

# ## For subjects without a post Consumption to test time, we're using the time between pre and post test -- DO NOT "impute" this way -- JW 11/11/2022
# testTimes$postConsumpTimeToTest[is.na(testTimes$postConsumpTimeToTest)] <- testTimes$Post_PreTime[is.na(testTimes$postConsumpTimeToTest)]

pt.df <- merge(pt.df.w, testTimes[, c("subject_id", "postConsumpTimeToTest", "Post_PreTime")], 
               by = "subject_id")

ps <- merge(ps, testTimes[, c("subject_id", "postConsumpTimeToTest", "Post_PreTime")], 
               by = "subject_id")

non_user.id <- pt.df$subject_id[pt.df$user_cat == "non-user"]
vs <- read.xlsx(file.path(data_folder, ps_folder, 
                         "All CDPHE VAS Scores_30Nov2022.xlsx"), 
                sheet = "VAS")
### Data Dictionary
# "subject_id"
# "vas0_high_score" -- vas score for how high you feel at pre
# "vas0_score_dc" -- how confident you feel about driving at pre
# "vas1_high_score" -- vas score for how high you feel at post
# "vas1_score_dc" -- how confident you feel about driving at post

pt.df <- merge (pt.df, vs, 
                by = "subject_id", 
                all.x = T)
pt.df$vas_high_score_chg <- pt.df$vas1_high_score - pt.df$vas0_high_score
pt.df$vas_score_dc_chg <- pt.df$vas1_score_dc - pt.df$vas0_score_dc

Remove Outliers

## Remove known outliers
ps$outliers <- 0
# ps$outliers[ps$subject_id == "001-109" & ps$tp == "pre2"] <- 1 # Ben revised
ps$outliers[ps$subject_id == "001-060" & ps$tp == "post"] <- 1

ps <- ps[ps$outliers == 0, ]

Plot of PRE/POST for Right Eye

right.df <- ps[ps$eye == "Right", ]

ggplot(right.df, aes(x=frame/30, y = percent_change, group = subject_id))+
  geom_line(show.legend = FALSE, alpha = 0.5)+
  facet_grid(rows = vars(user_cat), cols = vars(tp))+
  labs(title = "Percent Change by Pre/Post and THC use category", 
       x = "seconds")+
  theme_bw()

1.2 Create Analytic Sample

Make sure datasets have the same participants and are truncated at frame 400

ps <- ps[ps$frame >= 0 & ps$frame <= 400, 
         c("subject_id", "tp", "eye", "frame", "percent_change")]

ps <- ps[order(ps$subject_id, ps$tp, ps$eye, ps$frame), ]
ps$frame_char <- str_pad(ps$frame, width = 3, side = c("left"), pad = "0")
ps$frame <- NULL

ps.w <- reshape(ps[, c("subject_id","tp", "eye", "frame_char", "percent_change")], 
                          timevar = "frame_char", 
                          idvar = c("subject_id", "tp", "eye"), 
                          direction = "wide")
var.names <-  c(names(ps.w)[1:3], sort(names(ps.w)[4:404]))

ps.w <- ps.w[, var.names]

id.names <- paste(ps.w$subject_id, ps.w$tp, ps.w$eye, sep = "_")
rownames(ps.w) <- id.names

missData <- apply(ps.w[, 4:404], MARGIN = 1, FUN = function(x) sum(is.na(x)))

missInterpolate <- function(x){
  z <- zoo(x, order.by = seq(0:400))
  y <- na.approx(z, maxgap = 3, na.rm = FALSE)
  
  return(as.numeric(y))
}

test <- apply(ps.w[, 4:404], 
              MARGIN = 1, 
              FUN = missInterpolate)

ps.w <- data.frame(t(test))

names(ps.w) <- var.names[4:404]

ps.w$subject_id <- substr(rownames(ps.w), 1, 7)
ps.w$tp <- ifelse(grepl("pre2", rownames(ps.w)) == T, "pre2", "post")
ps.w$eye <- ifelse(grepl("Left", rownames(ps.w)) == T, "Left", "Right")

ps.w <- ps.w[, var.names]

pctChg.names <- var.names[4:404]

ps <- reshape(ps.w,
              varying = pctChg.names, 
              v.names = "percent_change",
              timevar = "frame", 
              times = 0:400,
              idvar = c("subject_id", "tp", "eye"),
              direction = "long")

ps <- ps[order(ps$subject_id, ps$tp, ps$eye), ]
right_0.post <- ps[ps$tp == "post" & ps$eye == "Right", ]
post.ids <- unique(right_0.post$subject_id)

right_0.post.w <- ps.w[ps.w$tp == "post" & ps.w$eye == "Right", ]

right_0.pt <- reshape(ps[ps$eye == "Right", ], 
                      timevar = "tp", 
                      idvar = c("subject_id", "frame"), 
                      direction = "wide")

right_0.pt$wiPctChg <- right_0.pt$percent_change.post - right_0.pt$percent_change.pre2

right_0.pt <- right_0.pt[, c("subject_id", "frame", "wiPctChg")]

right_0.pt.w <- reshape(right_0.pt[, c("subject_id", "frame", "wiPctChg")], 
                     timevar = "frame", 
                     idvar = "subject_id", 
                     direction = "wide")

# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(right_0.pt.w[, 2:402]))
rowsAllMissing <- names(allMissing)[allMissing == 401]

right_0.pt.w <- right_0.pt.w[!(rownames(right_0.pt.w) %in% rowsAllMissing), ]

################################# ----------------------------- Left eye
left_0.post <- ps[ps$tp == "post" & ps$eye == "Left", ]
left.post.ids <- unique(left_0.post$subject_id)

left_0.post.w <- ps.w[ps.w$tp == "post" & ps.w$eye == "Left", ]

left_0.pt <- reshape(ps[ps$eye == "Left", ], 
                      timevar = "tp", 
                      idvar = c("subject_id", "frame"), 
                      direction = "wide")

left_0.pt$wiPctChg <- left_0.pt$percent_change.post - left_0.pt$percent_change.pre2

left_0.pt <- left_0.pt[, c("subject_id", "frame", "wiPctChg")]

left_0.pt.w <- reshape(left_0.pt[, c("subject_id", "frame", "wiPctChg")], 
                     timevar = "frame", 
                     idvar = "subject_id", 
                     direction = "wide")

# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(left_0.pt.w[, 2:402]))
rowsAllMissing <- names(allMissing)[allMissing == 401]

left_0.pt.w <- left_0.pt.w[!(rownames(left_0.pt.w) %in% rowsAllMissing), ]

## Find intersection of all participant files across data sets to define the analytic subjects

analytic.N <- intersect(unique(right_0.post$subject_id), unique(right_0.pt$subject_id))
analytic.N <- intersect(analytic.N, unique(right_0.post.w$subject_id))
analytic.N <- intersect(analytic.N, unique(right_0.pt.w$subject_id))

analytic_L.N <- intersect(unique(left_0.post.w$subject_id), unique(left_0.post$subject_id))
analytic_L.N <- intersect(analytic.N, unique(left_0.pt$subject_id))
analytic_L.N <- intersect(analytic.N, unique(left_0.pt.w$subject_id))

analytic_LR.N <- intersect(analytic.N, analytic_L.N)

## Filter all datasets to analytic sample

left_0.post <- left_0.post[left_0.post$subject_id %in% analytic_LR.N, ]
left_0.post$seconds <- left_0.post$frame/30
left_0.post.w <- left_0.post.w[left_0.post.w$subject_id %in% analytic_LR.N ,]
row.names(left_0.post.w) <- left_0.post.w$subject_id

left_0.post <- merge(left_0.post, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)
left_0.post.w <- merge(left_0.post.w, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)

rownames(left_0.post.w) <- left_0.post.w$subject_id

left_0.pt <- left_0.pt[left_0.pt$subject_id %in% analytic_LR.N, ]
left_0.pt$seconds <- left_0.pt$frame/30
left_0.pt.w <- left_0.pt.w[left_0.pt.w$subject_id %in% analytic_LR.N, ]
row.names(left_0.pt.w) <- left_0.pt.w$subject_id

left_0.pt <- merge(left_0.pt, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)
left_0.pt.w <- merge(left_0.pt.w, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)

rownames(left_0.pt.w) <- left_0.pt.w$subject_id

## Filter all datasets to analytic sample
right_0.post <- right_0.post[right_0.post$subject_id %in% analytic.N, ]
right_0.post$seconds <- right_0.post$frame/30
right_0.post.w <- right_0.post.w[right_0.post.w$subject_id %in% analytic.N ,]
row.names(right_0.post.w) <- right_0.post.w$subject_id

right_0.post <- merge(right_0.post, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)
right_0.post.w <- merge(right_0.post.w, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)

rownames(right_0.post.w) <- right_0.post.w$subject_id

right_0.pt <- right_0.pt[right_0.pt$subject_id %in% analytic.N, ]
right_0.pt$seconds <- right_0.pt$frame/30
right_0.pt.w <- right_0.pt.w[right_0.pt.w$subject_id %in% analytic.N, ]

right_0.pt <- merge(right_0.pt, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)
right_0.pt.w <- merge(right_0.pt.w, pt.df[, c("subject_id", "user_cat")], 
                     by = "subject_id", 
                     all.x = T)

rownames(right_0.pt.w) <- right_0.pt.w$subject_id

pt.analytic.df <- pt.df[pt.df$subject_id %in% analytic.N,]

post.right.ids <- unique(ps$subject_id[ps$tp == "post" & ps$eye == "Right"])
pre.right.ids <- unique(ps$subject_id[ps$tp == "pre2" & ps$eye == "Right"])

post.exclude.ids <- post.right.ids[!(post.right.ids %in% pre.right.ids)]

# ### Write Analytic Files
# 
# write.csv(pt.analytic.df, 
#          file.path(data_folder, "PupilLightResponse_AnalyticSample_Demog.csv"), 
#          row.names = F)
# 
# write.csv(right_0.post.w, 
#           file.path(data_folder, "PupilLightResponse_RightEye_Post_Wide.csv"), 
#           row.names = F)
# 
# write.csv(right_0.post, 
#           file.path(data_folder, "PupilLightResponse_RightEye_Post_Long.csv"), 
#           row.names = F)
for(i in post.exclude.ids){
  print(ggplot(ps[ps$subject_id == i & ps$tp == "post" & ps$eye == "Right", ], 
         aes(x = frame, y = percent_change))+
          geom_line())
}

#Figure Typical Pupil Light Response

# for(i in non_user.id){
#   print(ggplot(right_0.post[right_0.post$subject_id == i, ], 
#          aes(x = frame, y = percent_change))+
#         labs(x = i)+
#         geom_line())
# }

typical.df <- right_0.post[right_0.post$subject_id == "001-032", ]
typical.df$seconds <- typical.df$frame/30

typical_1 <- typical.df$percent_change[typical.df$seconds == 1]
typical_2 <- typical.df$percent_change[typical.df$seconds == 2]
typical_5 <- typical.df$percent_change[typical.df$seconds == 5]

min.constrict.x <- typical.df$seconds[typical.df$percent_change == min(typical.df$percent_change, na.rm = T)][1]
min.constrict.y <- min(typical.df$percent_change, na.rm = T)

typical.df$minConstrict <- ifelse(typical.df$seconds == min.constrict.x & 
                          typical.df$percent_change == min.constrict.y, 
                          "Point of \nminimal \nconstriction", "")

typical.df$minConstrict[typical.df$seconds == 2] <- "Percent \nchange \nat 2 seconds"
typical.df$minConstrict[typical.df$seconds == 5] <- "Percent \nchange \nat 5 seconds"

ggplot(typical.df, aes(x = seconds, y = percent_change))+
  geom_line()+
  geom_ribbon(data = typical.df[typical.df$seconds >= min.constrict.x, ], 
              aes(ymin = 0, ymax = percent_change), fill= "lightblue")+
  # geom_text(hjust = 0.001)+
  labs(y = "Percent change in pupil response",
       x = "Seconds from start of light test")+
  scale_x_continuous(expand = c(0,0), limits = c(0, 10), 
                     breaks = c(0,2,4,6,8,10))+
  scale_y_continuous(expand = c(0,0), limits = c(-30, 1))+
  # annotate("rect", xmin = min.constrict.x, xmax = 10,
  #          ymin = min.constrict.y-2, ymax = 1,
  #          alpha = 0.5, fill = "lightblue")+
  geom_point(x = min.constrict.x, 
             y = min.constrict.y, 
             colour = "darkblue", 
             size = 2)+
  geom_point(x = 5, 
             y = typical_5, 
             colour = "violet", 
             size = 2)+
  geom_point(x = 2, 
             y = typical_2, 
             colour = "violet", 
             size = 2)+
  geom_vline(xintercept = min.constrict.x, color = "darkblue", linetype ="dotted")+
  # annotate("text", x =min.constrict.x-0.25, 
  #          y = min.constrict.y-0.5,
  #          label = "Point of \nminimal \nconstriction", 
  #          hjust= 1)+
  geom_text_repel(aes(label = minConstrict), 
                   force = 50, 
                   max.overlaps = 50, 
                   nudge_x = 0.1)+

  annotate("text", x = 6.25, y = -1, 
           label = "Rebound dilation")

2 Table 1

No Consumption end time recorded for non-users

pt.df$postConsumpTimeToTest[pt.df$user_cat == "non-user"] <- 0

tab1 <- pt.df[pt.df$subject_id %in% analytic.N, c("user_cat", "demo_age", "demo_gender", "BMI", "thc.post", "postConsumpTimeToTest")]
tab1$demo_gender <- as.character(tab1$demo_gender)

table1.df <- tbl_summary(tab1,
                         by = user_cat,
                         statistic = list(
                           demo_age ~ "{mean} ({sd})",
                           BMI ~ "{mean} ({sd})",
                           thc.post ~ "{median} ({p25}, {p75})",
                           postConsumpTimeToTest ~ "{mean} ({sd})",
                           all_categorical() ~ "{n} ({p}%)"
                         ),
                         label = list(
                           demo_age ~ "Age (years)", 
                           demo_gender ~ "Sex", 
                           BMI ~ "Body Mass Index (kg/m^2)", 
                           thc.post ~ "THC, after cannabis smoking (ng/ml)", 
                           postConsumpTimeToTest ~ "Time Interval of pupillary measurements after initiation of cannabis smoking (mins)"
                         ),
                         digits = all_continuous() ~ 2, 
                         missing = "no") %>%
  modify_header(all_stat_cols() ~ "**{level}**\n(N = {n})") %>%
  add_overall(last = TRUE, col_label = "**Total**\n(N = {N})") %>%
  add_p() %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Cannabis Use Group**") %>%
  bold_labels()
table1.df
Characteristic Cannabis Use Group Overall, N = 841 p-value2
non-user (N = 29)1 occasional (N = 30)1 daily (N = 25)1
Age (years) 32.29 (4.70) 31.15 (4.75) 32.75 (5.71) 32.02 (5.02) 0.5
Sex 0.2
    Female 16 (55%) 10 (33%) 9 (36%) 35 (42%)
    Male 13 (45%) 20 (67%) 16 (64%) 49 (58%)
Body Mass Index (kg/m^2) 24.94 (4.72) 24.49 (3.96) 27.08 (4.26) 25.42 (4.41) 0.066
THC, after cannabis smoking (ng/ml) 0.00 (0.00, 0.00) 5.73 (3.73, 9.47) 17.84 (8.20, 42.42) 3.96 (0.00, 13.60) <0.001
Time Interval of pupillary measurements after initiation of cannabis smoking (mins) 0.00 (0.00) 63.93 (6.26) 60.16 (3.78) 40.74 (30.10) <0.001
1 Mean (SD); n (%); Median (IQR)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
# table1.df %>% as_flex_table() %>% flextable::save_as_docx(path = "../results/tabl1.docx")
# tab1_latex <- table1.df %>% as_gt() %>% gt::as_latex()

# table1.df %>% as_gt() %>% gt::gtsave(filename = "../results/tabl1.png")



pt.df$postConsumpTimeToTest[pt.df$user_cat == "non-user"] <- NA


# boxplot(demo_age ~ user_cat, data = tab1)
# m1 <- lm(log(demo_age) ~ user_cat, data = tab1)
# summary(m1)
# hist(resid(m1))
# 
# boxplot(BMI ~ user_cat, data = tab1)
# 
# m2 <- lm(BMI ~ user_cat, data = tab1)
# summary(m2)
# hist(resid(m2), breaks = 15)
# 
# plot(fitted(m2), resid(m2))

boxplot(pt.df$postConsumpTimeToTest)

hist(pt.df$postConsumpTimeToTest)

summary(pt.df$postConsumpTimeToTest)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   53.00   59.00   62.00   62.74   66.00   84.00      32

2.1 Post Data

post.user <- ggplot(right_0.post, aes(x=seconds, y = percent_change, group = subject_id))+
             geom_line(show.legend = FALSE, alpha = 0.5)+
             facet_grid(rows = vars(user_cat))+
             labs(title ="POST data percent change by THC use category")+
             theme_bw()

post.user

3 Analysis

3.1 POST DATA: Logistic Regression models to predict Smoker vs non-smoker using post data. plot AUC

3.2 SoFR for prediction of smoking

Another method that can be used to predict smoking status is a scalar-on-function regression model which uses differences between the whole trajectories to determine smoking status. However, when there are missing values (due to different test durations), all trajectories should be truncated to the shortest or the values should be imputed with fPCA. (See exploration of using SoFR line starting at 3000)

sofr_impute_df <- right_0.post.w[, pctChg_vars]
rownames(sofr_impute_df) <- rownames(right_0.post.w)

sofr_fpca <- fpca.face(as.matrix(sofr_impute_df))
sofr_fpca2 <- sofr_fpca$Yhat
names(sofr_fpca2) <- names(sofr_impute_df)

sofr_imputed <- sofr_impute_df
sofr_imputed[is.na(sofr_imputed)] <-  sofr_fpca2[is.na(sofr_imputed)]

ncols = ncol(sofr_imputed)

smk.df <- pt.analytic.df[, c("subject_id", "user_cat")]
smk.df$smoker <- ifelse(pt.analytic.df$user_cat %in% c("daily", "occasional"), 1, 0)

sind <- seq(0, 1, len = ncols)
smat <- matrix(sind, nrow(smk.df), ncols, byrow = T)

# smk.df$Zsm <- I(Zsm)
smk.df$Zraw <- I(as.matrix(sofr_imputed))
smk.df$smat <- I(smat)
smk.df$lmat <- I(matrix(1/ncols, nrow(smk.df), ncols))
smk.df$zlmat <- I(smk.df$lmat * smk.df$Zraw)

smk_fglm_ps2 <- gam(smoker ~ s(smat, by=zlmat, bs = "cr", k = 30), data=smk.df, 
                   method = "REML", family = binomial)
summary(smk_fglm_ps2)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## smoker ~ s(smat, by = zlmat, bs = "cr", k = 30)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   1.3731     0.5711   2.404   0.0162 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df Chi.sq p-value
## s(smat):zlmat 4.79  5.463  10.78   0.105
## 
## R-sq.(adj) =  0.117   Deviance explained = 13.1%
## -REML = 52.945  Scale est. = 1         n = 84
pt.sofr.roc <- roc(smk_fglm_ps2$model$smoker, smk_fglm_ps2$fitted.values)
pt.sofr.roc.auc <- auc(pt.sofr.roc)

auc.df <- data.frame("Label" = c("Post AUC Scalar Feat", "Post SoFR AUC"), 
                     "AUC" = c(post.auc.scalar, pt.sofr.roc.auc), 
                     stringsAsFactors = F)


sofr_auc <- data.frame("subject_id" = smk.df$subject_id, 
                       "smoker" = smk.df$smoker, 
                       "post_sofr_scores" = smk_fglm_ps2$fitted.values)

auc_inference <- merge(auc_inference, sofr_auc, 
                       by = c("subject_id", "smoker"))

3.2.1 Plot of SoFR with fPCA imputed data for missing values

df_sofr <- data.frame("smat" = seq(0, 1, length.out = ncol(smk.df$smat)),
                      "zlmat" = 1)
sofr_pred <- predict(smk_fglm_ps2, newdata = df_sofr, se.fit = TRUE, type = "iterms")
# sofr_pred2 <- predict(smk_fglm_ps2, newdata = df_sofr, se.fit = TRUE, type = "terms")
plot.sofr <- data.frame("frame" = 0:(ncol(smk.df$smat) - 1),
                            "f0" = sofr_pred$fit[, 1],
                            'f0.se' = sofr_pred$se.fit[, 1])

plot.sofr$lci <- exp(plot.sofr$f0 - 1.96*plot.sofr$f0.se)
plot.sofr$uci <- exp(plot.sofr$f0 + 1.96*plot.sofr$f0.se)
plot.sofr$OR <- exp(plot.sofr$f0)
plot.sofr$sec <- plot.sofr$frame/30
plot.sofr$p <- exp(plot.sofr$f0)/(1+exp(plot.sofr$f0))
sigRegion <- function(df, crit.val, imp.var){
  time <- imp.var[1]
  est <- imp.var[2]
  lci <- imp.var[3]
  uci <- imp.var[4]
  
  
  crit.rows <- which((df[, lci] < crit.val & df[, uci] < crit.val) |
                       (df[, lci] > crit.val & df[, uci] > crit.val))
  
  # print(crit.rows)
  
  group <- vector(mode = "numeric", length = length(crit.rows))
  grp.ind <- 1
  for(i in 1:length(crit.rows)){
    if(i == 1){
      group[i] <- grp.ind}
    else(if(i > 1){
      if(crit.rows[i] - crit.rows[i-1] == 1){
        group[i] <- grp.ind
      }
      else(if(crit.rows[i] - crit.rows[i-1] != 1){
        grp.ind <- grp.ind+1
        group[i] <- grp.ind
      })
    })
    
  }
  # print(group)
  
  num.grp <- unique(group)
  ind.df <- data.frame(group = NA, 
                       start = NA, 
                       end = NA)
  for(i in num.grp){
    ind.df[i,] <- c(i, min(crit.rows[group == i]), max(crit.rows[group == i]))
  }
  
  # print(ind.df)
  
  res.df <- data.frame(int.start = NA, 
                       int.end = NA, 
                       est.time = NA,
                       est = NA,
                       lci = NA,
                       uci = NA)
  # print(nrow(ind.df))
  
  for(i in 1:nrow(ind.df)){
    
    sub.df <- df[(ind.df$start[i]):(ind.df$end[i]), ]
    
    # print(sub.df[1, ])
    
    if(sub.df[1, lci] > crit.val & sub.df[1, uci] >crit.val){
      start1 <- round(sub.df[1, time], 2)
      end1 <- round(sub.df[nrow(sub.df), time], 2)
      or1 <- max(sub.df[ , est])
      peaktime1 <- round(sub.df[sub.df[, est] == or1, time], 2)
      lci1 <- round(sub.df[sub.df[, est] == or1, lci], 2)
      uci1 <- round(sub.df[sub.df[, est] == or1, uci], 2)
      or1 <- round(or1, 2)
      
      res.df[i, ] <- c(start1, end1, peaktime1, or1, lci1, uci1)
      # print(res.df)
      
    }
    if(sub.df[1, lci] < crit.val & sub.df[1, uci] < crit.val){
      start2 <- round(sub.df[1, time], 2)
      end2 <- round(sub.df[nrow(sub.df), time], 2)
      or2 <- min(sub.df[, est])
      peaktime2 <- round(sub.df[sub.df[, est] == or2, time], 2)
      lci2 <- round(sub.df[sub.df[, est] == or2, lci], 2)
      uci2 <- round(sub.df[sub.df[, est] == or2, uci], 2)
      or2 <- round(or2, 2)
      
      res.df[i, ] <- c(start2, end2, peaktime2, or2, lci2, uci2)
      # print(res.df)
    }
  }
  
  
  # crit.ind <- by(crit.rows, INDICES = list(group), FUN = function(x) c(min(x), max(x)))
  return(res.df)
}

3.2.2 Extract Significant Regions in SoFR analysis

sofr.sigRegions <- sigRegion(plot.sofr, 1, imp.var = c("sec", "OR", "lci", "uci"))

3.2.3 Plot SoFR Results

sofr_plot <- ggplot(data = plot.sofr, aes(x=frame/30, y = exp(f0)))+
  geom_line()+
  geom_line(aes(x=frame/30, y = exp(f0-1.96*f0.se)), linetype = 'longdash')+
  geom_line(aes(x=frame/30, y = exp(f0+1.96*f0.se)), linetype = 'longdash')+
  geom_segment(data=sofr.sigRegions, aes(x = int.start, y=1, xend= int.end, yend = 1), 
               color = "darkred", linewidth =1.2)+
  # geom_hline(yintercept = 1, color = "darkred", linetype = 1) +
  scale_x_continuous(expand = c(0, 0), limits = c(0,10), 
                     breaks = c(0,2,4,6,8,10))+
  scale_y_continuous(limits = c(0,8), 
                     breaks = c(0,1,2,3,4,6,8)) +
  labs(title = "", 
       x = "Seconds after start of light test", 
       y = "Odds ratio between smokers and non-smokers")+
  theme_bw()

# sofr_plot_prob <- ggplot(data = plot.sofr, aes(x=frame/30, y = p))+
#                   geom_line()+
#                   geom_line(aes(x=frame/30, y = exp(f0-1.96*f0.se)/(1+exp(f0-1.96*f0.se))), linetype = 'longdash')+
#                   geom_line(aes(x=frame/30, y = exp(f0+1.96*f0.se)/(1+exp(f0+1.96*f0.se))), linetype = 'longdash')+
#                   geom_segment(data=sofr.sigRegions, aes(x = int.start, y=0.50, xend= int.end, yend = 0.5),
#                                color = "darkred", linetype = "dotted", linewidth =1.2)+
#                 # geom_hline(yintercept = 1, color = "darkred", linetype = 1) +
#                   scale_x_continuous(expand = c(0, 0), limits = c(0,10))+
#                   ylim(0,1) +
#                   labs(title = "", 
#                        x = "Seconds after start of light test", 
#                        y = "Probability of being a smoker")+
#                   theme_bw()

sofr_plot

3.2.4 AUC Plot Comparing feature models and full trajectory models

# "Prediction of Cannabis Use with Single Summary Features and Full Trajectory Model after Cannabis Consumption"
# jpeg(file.path(res_folder, "AUC_4PC_ScalarFeatures.jpg"), 
#      height = 6, width = 8, res = 300, units = "in")
roc.col <- viridis(5)

# plot.roc(pt.scalar.roc, main = "", 
#          col = roc.col[2])
# plot.roc(pt.sofr.roc, main = "", add = T, col = roc.col[5])
# legend(0.7, 0.1, legend = c(paste0("Summary Features AUC: ",round(post.auc.scalar, 3)),
#                              paste0("Full Trajectory AUC: ", round(pt.sofr.roc.auc, 3))),
#        col = roc.col[c(2,5)], 
#        lty = rep(1, 2), cex = 0.8, bty="n")

  
auc_inference.l <- reshape(auc_inference, 
                           varying = c("post_SF_aucScores", "post_sofr_scores"), 
                           v.names = "prediction", 
                           timevar = "model", 
                           times = c("post_SF_aucScores", "post_sofr_scores"), 
                           direction = "long")
auc_inference.l$modelName <- ifelse(auc_inference.l$model == "post_SF_aucScores", 
                                    paste0("Traditional LogRegr AUC: ",round(post.auc.scalar, 3)), 
                                    paste0("Functional LogRegr AUC: ", round(pt.sofr.roc.auc, 3)))


plot.roc <- ggplot(data = auc_inference.l, aes(d = smoker, m = prediction, color = modelName))+
            geom_roc(n.cuts=0)+
            labs(x = "1-Specificity", 
                 y = "Sensitivity", 
                 color = "")+
            scale_color_manual(values = roc.col[c(5,2)]) +
            theme_bw()+
            theme(legend.position = c(0.6,0.2))
plot.roc

final.roc <- grid.arrange(plot.roc, sofr_plot, 
                          layout_matrix = cbind(1,2))

labeled.roc.plot <- as_ggplot(final.roc) +
                    draw_plot_label(label = c("A", "B"), size = 15, 
                                    x = c(0, 0.5), y = c(1, 1))

labeled.roc.plot

3.3 Compare AUC across models

auc.df$AUC <- round(as.numeric(auc.df$AUC), 3)

knitr::kable(auc.df)
Label AUC
Post AUC Scalar Feat 0.675
Post SoFR AUC 0.713

AUC Inference

Vr_10 <- function(nN, srpi, srnj){
  Vr_10i <- vector(mode = "numeric", length = length(srpi))
  
  for(i in 1:length(srpi)){
    Vr_10i[i] = (1/nN)*(sum(srpi[i] > srnj)+ 0.5*sum(srpi[i] ==  srnj))
  }
  
  return(Vr_10i)
}

Vr_01 <- function(pN, srpi, srnj){
  Vr_01i <- vector(mode = "numeric", length = length(srnj))
  
  for(i in 1:length(srnj)){
    Vr_01i[i] = (1/pN)*(sum(srnj[i] < srpi)+ 0.5*sum(srnj[i] ==  srpi))
  }
  
  return(Vr_01i)
}

w10 <- function(nP, v10_r, auc_r, v10_s, auc_s){
  diff_r <- vector(mode = "numeric", length = length(v10_r))
  diff_s <- vector(mode = "numeric", length = length(v10_s))
  
  for(i in 1:length(v10_r)){
    diff_r[i] <- v10_r[i] - auc_r
    diff_s[i] <- v10_s[i] - auc_r
  }
  
  res <- (nP-1)^(-1)* sum(diff_r * diff_s)
  
  return(res)
}

w01 <- function(nN, v01_r, auc_r, v01_s, auc_s){
  diff_r <- vector(mode = "numeric", length = length(v01_r))
  diff_s <- vector(mode = "numeric", length = length(v01_s))
  
  for(i in 1:length(v01_r)){
    diff_r[i] <- v01_r[i] - auc_r
    diff_s[i] <- v01_s[i] - auc_r
  }
  res <- (nN-1)^(-1)* sum(diff_r * diff_s)
  return(res)
}

w_rs <- function(nP, nN, srpi_r, srnj_r, srpi_s, srnj_s, auc_r, auc_s){
  v10_r <-  Vr_10(nN, srpi_r, srnj_r)
  v01_r <-  Vr_01(pN=nP, srpi_r, srnj_r)
  
  v10_s <-  Vr_10(nN, srpi_s, srnj_s)
  v01_s <-  Vr_01(pN=nP, srpi_s, srnj_s)
  
  w10_rs <- w10(nP, v10_r, auc_r, v10_s, auc_s)
  w01_rs <- w01(nN, v01_r, auc_r, v01_s, auc_s)
  
  
  res <-  (nP)^(-1)*w10_rs + (nN)^(-1)*w01_rs
  return(res)
}

w_postSF_postSF <- w_rs(nP = sum(auc_inference$smoker == 1),
                        nN = sum(auc_inference$smoker == 0), 
                        srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
                        srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
                        srpi_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
                        srnj_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
                        auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
                        auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])

w_postSOFR_postSF <- w_rs(nP = sum(auc_inference$smoker == 1),
                        nN = sum(auc_inference$smoker == 0), 
                        srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
                        srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
                        srpi_s = auc_inference$post_sofr_scores[auc_inference$smoker == 1],
                        srnj_s = auc_inference$post_sofr_scores[auc_inference$smoker == 0],
                        auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
                        auc_s = auc.df$AUC[auc.df$Label == "Post AUC SoFR"])


w_postSOFR_postSOFR <- w_rs(nP = sum(auc_inference$smoker == 1),
                        nN = sum(auc_inference$smoker == 0), 
                        srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
                        srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
                        srpi_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
                        srnj_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
                        auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
                        auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"]) 

z_postSF_postSOFR <- (auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"] - auc.df$AUC[auc.df$Label == "Post SoFR AUC"])/
  (sqrt(w_postSF_postSF +  w_postSOFR_postSOFR - w_postSOFR_postSF))

auc_compare <- data.frame("Test Desciption" = "AUC-postSF v AUC-postSoFR", 
                          "z" = round(z_postSF_postSOFR, 3))

auc_compare$p <- round((1-pnorm(abs(auc_compare$z)))*2, 3)

knitr::kable(auc_compare)
Test.Desciption z p
AUC-postSF v AUC-postSoFR -0.53 0.596

4 Function on Scalar Regression

4.1 Post

\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t)*I(user = occasional) + f_2(t)*I(user = daily) + b_i(t) + \epsilon_i(t)\\ & b_i(t) \sim GP(0, \Sigma_b) \\ & \epsilon_i(t) \sim N(0, \sigma_{\epsilon}^2) \\ &= \sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)*I(user = occasional) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i)*I(user = daily) + \sum_{k=1}^K \xi_{ik}\phi_k(t_i) + \epsilon_i(t)\\ \end{aligned} \]

4.1.1 Mean function for non-user

\[ \begin{aligned} Y_{i}(t) &= f_0(t)\\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) \\ &= \Phi_0\xi_0 \end{aligned} \]

4.1.2 Mean function for occasional user

\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)\\ &= \Phi_0\xi_0 + \Phi_1\xi_1\\ &= [ \Phi_0, \Phi_1 ] \xi \end{aligned} \]

4.1.3 Mean function for daily user

\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_2(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i) \\ &= \Phi_0\xi_0 + \Phi_2\xi_2\\ &= [ \Phi_0, \Phi_2 ] \xi \end{aligned} \]

right_0.gam.df <- merge(right_0.post, pt.analytic.df, 
                        by = c("subject_id", "user_cat"))

right_0.gam.df$sind <- right_0.gam.df$frame/400

m.post_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") + 
                          s(sind, by=occasional, k=30, bs = "cr") + 
                          s(sind, by=daily, k=30, bs = "cr"), 
                  data = right_0.gam.df, method = "REML")

## Create a matrix of residuals
post_gam.resid <- cbind(right_0.gam.df[!(is.na(right_0.gam.df$percent_change)), 
                                       c("subject_id", "frame")], 
                        m.post_gam$residuals)
names(post_gam.resid) <- c("subject_id", "frame", "resid")

resid_mat <- reshape(post_gam.resid[, c("subject_id", "frame", "resid")], 
                     timevar = "frame", 
                     idvar = "subject_id", 
                     direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL

resid_mat <- as.matrix(resid_mat)

post_gam.fpca <- fpca.face(resid_mat, knots = 15)

#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1

right_0.gam.df <- merge(right_0.gam.df, Phi_mat, 
                        by = "frame", 
                        all.x = T)
right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
right_0.gam.df$subject_id <- as.factor(right_0.gam.df$subject_id)

post_gam.fri <- mgcv::bam(percent_change ~ 
                            s(sind, k=30, bs="cr") + 
                            s(sind, by=occasional, k=30, bs = "cr") + 
                            s(sind, by=daily, k=30, bs = "cr") +
                            s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
                            s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"), 
                          method = "fREML", data = right_0.gam.df, discrete = TRUE)


summary(post_gam.fri)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional, 
##     k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") + 
##     s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2, 
##     bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id, 
##     by = Phi4, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.3125     0.9197  -11.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df        F p-value    
## s(sind)            27.54  28.43   115.67  <2e-16 ***
## s(sind):occasional 18.63  21.95    70.14  <2e-16 ***
## s(sind):daily      18.62  21.95    35.42  <2e-16 ***
## s(subject_id):Phi1 82.23  84.00 23472.92  <2e-16 ***
## s(subject_id):Phi2 81.68  84.00  2775.02  <2e-16 ***
## s(subject_id):Phi3 82.23  84.00   887.87  <2e-16 ***
## s(subject_id):Phi4 81.05  84.00  1139.47  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.966   Deviance explained = 96.6%
## fREML =  63334  Scale est. = 2.6314    n = 32642

4.1.4 Plotting trajectory of occasional & daily user

post_gam.coef <- post_gam.fri$coefficients
post_gam.cov <- vcov.gam(post_gam.fri)

df_pred_non <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 0, 
                            "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, 
                            "Phi4" = 0, 
                          "subject_id" = right_0.gam.df$subject_id[1])

lpmat_non <- predict(post_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")


df_pred_occ <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 1, "daily" = 0, 
                            "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, 
                            "Phi4" = 0, 
                          "subject_id" = right_0.gam.df$subject_id[1])

lpmat_occ <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "lpmatrix")

df_pred_dly <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 1, 
                            "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, 
                            "Phi4" = 0, 
                          "subject_id" = right_0.gam.df$subject_id[1])

lpmat_dly <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")


pred_df_user <- data.frame(seconds = rep(seq(0, 400), 3)/30,
                      user = c(rep("no use", 401), rep("occasional use", 401),rep("daily use", 401)), 
                      mean = c(lpmat_non %*% post_gam.coef, 
                               lpmat_occ %*% post_gam.coef, lpmat_dly %*% post_gam.coef), 
                      se = c(sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))), 
                             sqrt(diag(lpmat_occ %*% post_gam.cov %*% t(lpmat_occ))),
                             sqrt(diag(lpmat_dly %*% post_gam.cov %*% t(lpmat_dly)))), 
                      stringsAsFactors = F)

post_userProfile <- ggplot(data = pred_df_user, aes(x = seconds, y = mean, group = user, col = user))+
                    geom_line()+
                    labs(y = "Percent change in pupil response", 
                         x = "Seconds from start of light test", 
                         colour = "Cannabis Use")+
                    theme_bw()
post_userProfile

legend_userProfile <- g_legend(post_userProfile)
pred_f1 <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "terms")
pred_f2 <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "terms")

pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
                           f0_hat = lpmat_non %*% post_gam.coef, 
                           f0_se = sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))), 
                           f1_hat = pred_f1$fit[, 2], 
                           f1_se = pred_f1$se.fit[, 2], 
                           f2_hat = pred_f2$fit[, 3], 
                           f2_se = pred_f2$se.fit[, 3])

pred_coef_df$f1_lci <- pred_coef_df$f1_hat - 2*pred_coef_df$f1_se
pred_coef_df$f1_uci <- pred_coef_df$f1_hat + 2*pred_coef_df$f1_se

pred_coef_df$f2_lci <- pred_coef_df$f2_hat - 2*pred_coef_df$f2_se
pred_coef_df$f2_uci <- pred_coef_df$f2_hat + 2*pred_coef_df$f2_se


occ_non.sigRegion <- sigRegion(pred_coef_df, crit.val = 0, imp.var = c("seconds", "f1_hat", "f1_lci", "f1_uci"))
dly_non.sigRegion <- sigRegion(pred_coef_df, crit.val = 0, imp.var = c("seconds", "f2_hat", "f2_lci", "f2_uci"))


post_non_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f0_hat))+
                geom_line()+
                geom_line(aes(x = seconds, y = f0_hat + 2*f0_se), linetype = "longdash")+
                geom_line(aes(x = seconds, y = f0_hat - 2*f0_se), linetype = "longdash")+
                scale_x_continuous(expand = c(0, 0), limits = c(0,10))+
                scale_y_continuous(limits = c(-7, 7))+
                labs(title = "Average Percent Change of a Non-user (Post)", ylab = "Percent Change")+
                theme_bw()

post_occ_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
                geom_line()+
                geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
                geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
                # geom_line(aes(x = seconds, y = 0), col = "darkred")+
                geom_segment(data=occ_non.sigRegion, aes(x = int.start, y=0, xend= int.end, yend = 0),
                                color = "darkred", linewidth =1.2)+
                  
                labs(title = "Occasional use vs No use", 
                     y= "", 
                     x = "")+
                theme_bw()+
                scale_x_continuous(expand = c(0, 0), limits = c(0,10), 
                                   breaks = c(0,2,4,6, 8, 10))+
                scale_y_continuous(limits = c(-7, 7), breaks = c(-6,-4,-2,0,2,4,6))+
                theme(rect = element_rect(fill = "transparent"))

post_dly_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f2_hat))+
                geom_line()+
                geom_line(aes(x = seconds, y = f2_hat + 2*f2_se), linetype = "longdash")+
                geom_line(aes(x = seconds, y = f2_hat - 2*f2_se), linetype = "longdash")+
                # geom_line(aes(x = seconds, y = 0), col = "darkred")+
                geom_segment(data=dly_non.sigRegion, aes(x = int.start, y=0, xend= int.end, yend = 0),
                                color = "darkred", linewidth =1.2)+
                labs(title = "Daily use vs No use", 
                     y = "", 
                     x = "")+
                theme_bw()+
                scale_x_continuous(expand = c(0, 0), limits = c(0,10), 
                                   breaks = c(0,2,4,6, 8, 10))+
                scale_y_continuous(limits = c(-7, 7), breaks = c(-6,-4,-2,0,2,4,6))+
                theme(rect = element_rect(fill = "transparent"))

ylab <- textGrob(label = "Difference in percent change in pupil response", rot = 90, 
                 gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in User", width = 15, simplify = F),
                                  paste, collapse = "\n"), rot = 0, 
                 gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in User", width = 15, simplify = F),
                                  paste, collapse = "\n"), rot = 0, 
                 gp = gpar(fontsize = 8))

post_occ_coef <- grid.arrange(ylab, posval, negval, post_occ_plt, 
                              ncol = 2, nrow = 2, 
                              layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))

post_dly_coef <- grid.arrange(ylab, posval, negval, post_dly_plt, 
                              ncol = 2, nrow = 2, 
                              layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))

Plot difference between daily and occasional user

f2_f1 <- (lpmat_dly - lpmat_occ) %*% post_gam.coef
f2_f1.se <- sqrt(diag((lpmat_dly - lpmat_occ) %*% post_gam.cov %*% t((lpmat_dly - lpmat_occ))))

f2_f1_diff_df <- data.frame(seconds = seq(0, 400)/30, 
                            f2f1_diff = f2_f1, 
                            f2f1_se = f2_f1.se)

post_dlyocc_plt <- ggplot(data = f2_f1_diff_df, aes(x=seconds, y = f2f1_diff))+
                   geom_line()+
                   geom_line(aes(x = seconds, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
                   geom_line(aes(x = seconds, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
                   # geom_line(aes(x = seconds, y = 0), col = "darkred")+
                   labs(title = "Daily use vs Occasional use", 
                        y = "", 
                        x = "Seconds from start of light test")+
                   theme_bw()+
                   scale_x_continuous(expand = c(0, 0), limits = c(0,10), 
                                      breaks = c(0,2,4,6, 8, 10))+
                   scale_y_continuous(limits = c(-7, 7), breaks = c(-6,-4,-2,0,2,4,6))+
                   theme(rect = element_rect(fill = "transparent"))

ylab <- textGrob(label = "Difference in percent change in pupil response", rot = 90, 
                 gp = gpar(fontsize = 12))
posval_do <- textGrob(label = sapply(strwrap("Less Constriction in Daily User", width = 15, 
                                          simplify = F),
                                  paste, collapse = "\n"), rot = 0, 
                 gp = gpar(fontsize = 8))
negval_do <- textGrob(label = sapply(strwrap("More Constriction in Daily User", width = 15, 
                                          simplify = F),
                                  paste, collapse = "\n"), rot = 0, 
                 gp = gpar(fontsize = 8))

post_dlyocc_coef <- grid.arrange(ylab, posval_do, negval_do, post_dlyocc_plt, 
                                 ncol = 2, nrow = 2, 
                                 layout_matrix = rbind(c(1, 2, rep(4, 15)), 
                                                       c(1, 3, rep(4, 15))))

post_dlyocc_coef
## TableGrob (2 x 17) "arrange": 4 grobs
##   z         cells    name                grob
## 1 1 ( 1- 2, 1- 1) arrange text[GRID.text.983]
## 2 2 ( 1- 1, 2- 2) arrange text[GRID.text.984]
## 3 3 ( 2- 2, 2- 2) arrange text[GRID.text.985]
## 4 4 ( 1- 2, 3-17) arrange      gtable[layout]

Panel Graphic of Differences

g1 <- grid.arrange(posval, negval, post_occ_plt, 
                                 ncol = 2, nrow = 2, 
                                 layout_matrix = rbind(c(1, rep(3, 9)), 
                                                       c(2, rep(3, 9))))

g2 <- grid.arrange(posval, negval, post_dly_plt, 
                                 ncol = 2, nrow = 2, 
                                 layout_matrix = rbind(c(1, rep(3, 9)), 
                                                       c(2, rep(3, 9))))

g3 <- grid.arrange(posval_do, negval_do, post_dlyocc_plt, 
                                 ncol = 2, nrow = 2, 
                                 layout_matrix = rbind(c(1, rep(3, 9)), 
                                                       c(2, rep(3, 9))))

fosr_contrasts <- grid.arrange(ylab, g1, g2, g3,
             ncol = 2, nrow = 18,
             layout_matrix = rbind(c(1, rep(2, 15)),
                                   c(1, rep(2, 15)),
                                   c(1, rep(2, 15)),
                                   c(1, rep(2, 15)),
                                   c(1, rep(2, 15)),
                                   c(1, rep(3, 15)),
                                   c(1, rep(3, 15)),
                                   c(1, rep(3, 15)),
                                   c(1, rep(3, 15)),
                                   c(1, rep(3, 15)),
                                   c(1, rep(4, 15)), 
                                   c(1, rep(4, 15)),
                                   c(1, rep(4, 15)), 
                                   c(1, rep(4, 15)),
                                   c(1, rep(4, 15))))

fosr_contrasts
## TableGrob (15 x 16) "arrange": 4 grobs
##   z         cells    name                grob
## 1 1 ( 1-15, 1- 1) arrange text[GRID.text.983]
## 2 2 ( 1- 5, 2-16) arrange     gtable[arrange]
## 3 3 ( 6-10, 2-16) arrange     gtable[arrange]
## 4 4 (11-15, 2-16) arrange     gtable[arrange]

4.1.5 Check for differences between smoker vs non-smoker

smoker.gam.df <- merge(right_0.post, pt.analytic.df, 
                        by = c("subject_id", "user_cat"))
smoker.gam.df$sind <- smoker.gam.df$frame/400
smoker.gam.df$smoker <- ifelse(smoker.gam.df$user_cat == "non-user", 0, 1)

m.post_smoker_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") + 
                                 s(sind, by=smoker, k=30, bs = "cr"), 
                  data = smoker.gam.df, method = "REML")

## Create a matrix of residuals
post_smoker_gam.resid <- cbind(smoker.gam.df[!(is.na(smoker.gam.df$percent_change)), 
                                             c("subject_id", "frame")], 
                               m.post_smoker_gam$residuals)
names(post_smoker_gam.resid) <- c("subject_id", "frame", "resid")

# post_smoker_gam.resid$frame_char <- str_pad(post_smoker_gam.resid$frame, 
#                                             width = 3, side = "left", pad = "0")

resid_mat <- reshape(post_smoker_gam.resid[, c("subject_id", "frame", "resid")], 
                     timevar = "frame", 
                     idvar = "subject_id", 
                     direction = "wide")

rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]

resid_mat <- as.matrix(resid_mat)


post_smoker_gam.fpca <- fpca.face(resid_mat, knots = 15)

#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_smoker_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_smoker_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1

smoker.gam.df <- merge(smoker.gam.df, Phi_mat, 
                        by = "frame", 
                        all.x = T)
smoker.gam.df <- smoker.gam.df[order(smoker.gam.df$subject_id, smoker.gam.df$frame), ]
smoker.gam.df$subject_id <- as.factor(smoker.gam.df$subject_id)

post_smoker_gam.fri <- mgcv::bam(percent_change ~ 
                            s(sind, k=30, bs="cr") + 
                            s(sind, by=smoker, k=30, bs = "cr") +
                            s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
                            s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"), 
                          method = "fREML", data = smoker.gam.df, discrete = TRUE)


summary(post_smoker_gam.fri)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker, 
##     k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") + 
##     s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3, 
##     bs = "re") + s(subject_id, by = Phi4, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.3286     0.9645  -10.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df        F p-value    
## s(sind)            27.31  28.29   114.24  <2e-16 ***
## s(sind):smoker     19.49  22.78    66.85  <2e-16 ***
## s(subject_id):Phi1 82.65  84.00 24462.37  <2e-16 ***
## s(subject_id):Phi2 82.26  84.00  2954.85  <2e-16 ***
## s(subject_id):Phi3 82.56  84.00   943.48  <2e-16 ***
## s(subject_id):Phi4 81.70  84.00  1213.53  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.965   Deviance explained = 96.6%
## fREML =  63449  Scale est. = 2.6539    n = 32642

Plot difference between smoker and non-smoker

post_smoker_gam.coef <- post_smoker_gam.fri$coefficients
post_smoker_gam.cov <- vcov.gam(post_smoker_gam.fri)

df_pred_non <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" =0, 
                            "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, 
                            "Phi4" = 0, 
                          "subject_id" = smoker.gam.df$subject_id[1])

lpmat_non <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")


df_pred_smk <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" = 1, 
                            "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, 
                            "Phi4" = 0, 
                          "subject_id" = smoker.gam.df$subject_id[1])

lpmat_smk <- predict(post_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "lpmatrix")

pred_df_smk <- data.frame(seconds = rep(seq(0, 400), 2)/30,
                      user = c(rep("no use", 401), rep("cannabis use", 401)), 
                      mean = c(lpmat_non %*% post_smoker_gam.coef, 
                               lpmat_smk %*% post_smoker_gam.coef), 
                      se = c(sqrt(diag(lpmat_non %*% post_smoker_gam.cov %*% t(lpmat_non))), 
                             sqrt(diag(lpmat_smk %*% post_smoker_gam.cov %*% t(lpmat_smk)))), 
                      stringsAsFactors = F)

post_smkProfile <- ggplot(data = pred_df_smk, aes(x = seconds, y = mean, group = user, col = user))+
                   geom_line()+
                   scale_y_continuous(limits = c(min(pred_df_smk$mean - 2*pred_df_smk$se),
                                                 max(pred_df_smk$mean+2*pred_df_smk$se)))+
                   labs(y = "Percent Change")+
                   theme_bw()
post_smkProfile

legend_smkProfile <- g_legend(post_smkProfile)

Manuscript plot of mean trajectories for smokers and all user categories

user_col <- viridis(15)

post_allProfile <- ggplot(data = pred_df_user, aes(x = seconds, y = mean, group = user, col = as.factor(user)))+
                   geom_line()+
                   geom_line(data = pred_df_smk[pred_df_smk$user == "cannabis use", ],
                             aes(x = seconds, y = mean),
                             linetype = "longdash", col = user_col[12])+
                   scale_color_manual(values = c(user_col[c(2, 13, 11)]), 
                                      breaks = c("no use", "occasional use", "daily use"))+
                   scale_x_continuous(expand = c(0,0), limits = c(0, 10), 
                                      breaks = c(0,2,4,6, 8, 10))+
                   labs(y = "Percent change in pupil response", 
                        x = "Seconds from start of light test", 
                        colour = "")+
                   theme_bw()+
                   theme(legend.position = c(0.85,0.8), 
                         legend.text = element_text(size = 12))

post_allProfile

4.1.6 Manuscript Figure

final.fosr <- grid.arrange(post_allProfile, post_occ_coef, 
                           post_dly_coef, post_dlyocc_coef, 
                          layout_matrix = rbind(c(1,2),
                                                c(3,4)))

labeled.fosr.plot <- as_ggplot(final.fosr) +
                    draw_plot_label(label = c("A", "B", "C", "D"), size = 15, 
                                    x = c(0, 0.5, 0, 0.5), y = c(1, 1, 0.5, 0.5))

labeled.fosr.plot

4.2 Effect of Post Consumption time to test (PCTT) on trajectories of pupillary light reflex

pctt_hist <- ggplot(data= pt.analytic.df[pt.analytic.df$user_cat != "non-user", ], 
                    aes(x = postConsumpTimeToTest))+
              geom_histogram(binwidth = 1, fill = "darkgrey")+
              geom_vline(xintercept = mean(pt.analytic.df$postConsumpTimeToTest[pt.analytic.df$user != "non-user"], na.rm = T),
                         col = "darkred", linetype = "longdash")+
              scale_y_continuous(expand = c(0,0))+
              scale_x_continuous(breaks = c(53, 60, 65, 70, 77, 84))+
  labs(x= "Time delay from consumption to test (mins)", 
       y = "")+
  theme_bw()
pctt_hist

4.2.1 FoSR Post – Interaction with Post Consumption Time to Test (PCTT)

\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t)*I(user = occasional) + f_2(t)*I(user = daily) +f_3(t)*I(user = occasional)*PCTT + f_4(t)*I(user = daily)*PCTT + b_i(t) + \epsilon_i(t)\\ & b_i(t) \sim GP(0, \Sigma_b) \\ & \epsilon_i(t) \sim N(0, \sigma_{\epsilon}^2) \\ &= \sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)*I(user = occasional) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i)*I(user = daily)\\ &+ \sum_{k=1}^K \xi_{3k}\phi_{3k}(t_i)*I(user = occasional)*PCTT + \sum_{k=1}^K \xi_{4k}\phi_{4k}(t_i)*I(user = daily)*PCTT + \sum_{k=1}^K \xi_{ik}\phi_k(t_i) + \epsilon_i(t)\\ \end{aligned} \]

pt.analytic.df$postConsumpTimeToTest_c <- pt.analytic.df$postConsumpTimeToTest- round(mean(pt.analytic.df$postConsumpTimeToTest[pt.analytic.df$user_cat != "non-user"], na.rm = T), 3)

summary(pt.analytic.df$postConsumpTimeToTest_c)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -9.218000 -3.218000 -0.218000  0.000182  2.782000 21.782000        29
by(data = pt.analytic.df$postConsumpTimeToTest_c, INDICES = pt.analytic.df$user_cat, FUN = summary)
## pt.analytic.df$user_cat: non-user
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      NA      NA      NA     NaN      NA      NA      29 
## ------------------------------------------------------------ 
## pt.analytic.df$user_cat: occasional
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -8.218  -0.968   1.282   1.715   4.782  21.782 
## ------------------------------------------------------------ 
## pt.analytic.df$user_cat: daily
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -9.218  -4.218  -2.218  -2.058  -0.218   4.782
pt.analytic.df$occ_PCTT <- pt.analytic.df$occasional*pt.analytic.df$postConsumpTimeToTest_c
pt.analytic.df$dly_PCTT <- pt.analytic.df$daily*pt.analytic.df$postConsumpTimeToTest_c
pt.analytic.df$occ_PCTT[pt.analytic.df$user == "non-user"] <- 0
pt.analytic.df$dly_PCTT[pt.analytic.df$user == "non-user"] <- 0

post_intPCTT.gam.df <- merge(right_0.post, pt.analytic.df,
                             by = c("subject_id", "user_cat"))

# right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
post_intPCTT.gam.df$sind <- post_intPCTT.gam.df$frame/400

# head(right_0.gam.df)

m.post_intPCTT_gam <- mgcv::bam(percent_change ~ s(sind, k=30, bs="cr") +
                                  s(sind, by=occasional, k=30, bs = "cr") +
                                  s(sind, by=daily, k=30, bs = "cr") +
                                  s(sind, by=occ_PCTT, k=30, bs = "cr") + 
                                  s(sind, by=dly_PCTT, k=30, bs = "cr"), 
                                data = post_intPCTT.gam.df, method = "REML")
summary(m.post_intPCTT_gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional, 
##     k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") + 
##     s(sind, by = occ_PCTT, k = 30, bs = "cr") + s(sind, by = dly_PCTT, 
##     k = 30, bs = "cr")
## 
## Parametric coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -12.25464    0.07029  -174.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                       edf Ref.df      F p-value    
## s(sind)            22.077  25.48 201.94  <2e-16 ***
## s(sind):occasional 10.206  12.42  47.48  <2e-16 ***
## s(sind):daily       9.719  11.83  24.03  <2e-16 ***
## s(sind):occ_PCTT   11.555  14.08  65.76  <2e-16 ***
## s(sind):dly_PCTT    8.630  10.51  12.11  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.267   Deviance explained = 26.8%
## -REML = 1.1209e+05  Scale est. = 55.955    n = 32642
## Create a matrix of residuals
post_intPCTT_gam.resid <- cbind(post_intPCTT.gam.df[!(is.na(post_intPCTT.gam.df$percent_change)), 
                                       c("subject_id", "frame")],
                                m.post_intPCTT_gam$residuals)
names(post_intPCTT_gam.resid) <- c("subject_id", "frame", "resid")
# post_gam.resid$frame_char <- str_pad(post_gam.resid$frame, width = 3, side = "left", pad = "0")

resid_mat <- reshape(post_intPCTT_gam.resid[, c("subject_id", "frame", "resid")], 
                     timevar = "frame", 
                     idvar = "subject_id", 
                     direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]

resid_mat <- as.matrix(resid_mat)

post_intPCTT_gam.fpca <- fpca.face(resid_mat, knots = 15)

#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1

post_intPCTT.gam.df <- merge(post_intPCTT.gam.df, Phi_mat, 
                        by = "frame", 
                        all.x = T)
post_intPCTT.gam.df <- post_intPCTT.gam.df[order(post_intPCTT.gam.df$subject_id, post_intPCTT.gam.df$frame), ]
post_intPCTT.gam.df$subject_id <- as.factor(post_intPCTT.gam.df$subject_id)

post_intPCTT_gam.fri <- mgcv::bam(percent_change ~ 
                            s(sind, k=30, bs="cr") +
                            s(sind, by=occasional, k=30, bs = "cr") +
                            s(sind, by=daily, k=30, bs = "cr") +
                            s(sind, by=occ_PCTT, k=30, bs = "cr") + 
                            s(sind, by=dly_PCTT, k=30, bs = "cr")+
                            s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
                            s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"), 
                          method = "fREML", data = post_intPCTT.gam.df, discrete = TRUE)


summary(post_intPCTT_gam.fri)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional, 
##     k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") + 
##     s(sind, by = occ_PCTT, k = 30, bs = "cr") + s(sind, by = dly_PCTT, 
##     k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") + 
##     s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3, 
##     bs = "re") + s(subject_id, by = Phi4, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.1111     0.9229  -10.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df        F p-value    
## s(sind)            27.53  28.42   119.05  <2e-16 ***
## s(sind):occasional 18.76  22.06    77.86  <2e-16 ***
## s(sind):daily      18.16  21.43    38.47  <2e-16 ***
## s(sind):occ_PCTT   21.94  25.24    28.26  <2e-16 ***
## s(sind):dly_PCTT   19.38  22.77    17.82  <2e-16 ***
## s(subject_id):Phi1 81.24  84.00 22076.91  <2e-16 ***
## s(subject_id):Phi2 80.29  84.00  2495.67  <2e-16 ***
## s(subject_id):Phi3 81.38  84.00   787.75  <2e-16 ***
## s(subject_id):Phi4 79.63  84.00  1027.58  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.967   Deviance explained = 96.7%
## fREML =  62846  Scale est. = 2.5436    n = 32642
pt.analytic.df$smoker <- ifelse(pt.analytic.df$user_cat == "non-user", 0, 1)

pt.analytic.df$smoke_PCTT <- pt.analytic.df$smoker*pt.analytic.df$postConsumpTimeToTest_c
pt.analytic.df$smoke_PCTT[pt.analytic.df$user == "non-user"] <- 0


by(data = pt.analytic.df$postConsumpTimeToTest_c, INDICES = pt.analytic.df$smoker, FUN = summary)
## pt.analytic.df$smoker: 0
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      NA      NA      NA     NaN      NA      NA      29 
## ------------------------------------------------------------ 
## pt.analytic.df$smoker: 1
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -9.218000 -3.218000 -0.218000  0.000182  2.782000 21.782000
post_intPCTT_smoke.gam.df <- merge(right_0.post, pt.analytic.df,
                             by = c("subject_id", "user_cat", "eye"))

# right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
post_intPCTT_smoke.gam.df$sind <- post_intPCTT_smoke.gam.df$frame/400

# head(right_0.gam.df)

m.post_intPCTT_smoke_gam <- mgcv::bam(percent_change ~ s(sind, k=30, bs="cr") +
                                  s(sind, by=smoker, k=30, bs = "cr") +
                                  s(sind, by=smoke_PCTT, k=30, bs = "cr"), 
                                data = post_intPCTT_smoke.gam.df, method = "REML")
summary(m.post_intPCTT_smoke_gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker, 
##     k = 30, bs = "cr") + s(sind, by = smoke_PCTT, k = 30, bs = "cr")
## 
## Parametric coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -12.26266    0.07114  -172.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                       edf Ref.df      F p-value    
## s(sind)            21.930  25.33 197.05  <2e-16 ***
## s(sind):smoker     10.136  12.31  29.67  <2e-16 ***
## s(sind):smoke_PCTT  9.679  11.80  28.37  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.249   Deviance explained =   25%
## -REML = 1.1246e+05  Scale est. = 57.308    n = 32642
## Create a matrix of residuals
post_intPCTT_smoke_gam.resid <- cbind(post_intPCTT_smoke.gam.df[!(is.na(post_intPCTT_smoke.gam.df$percent_change)), 
                                       c("subject_id", "frame")],
                                m.post_intPCTT_smoke_gam$residuals)
names(post_intPCTT_smoke_gam.resid) <- c("subject_id", "frame", "resid")
# post_gam.resid$frame_char <- str_pad(post_gam.resid$frame, width = 3, side = "left", pad = "0")

resid_mat <- reshape(post_intPCTT_smoke_gam.resid[, c("subject_id", "frame", "resid")], 
                     timevar = "frame", 
                     idvar = "subject_id", 
                     direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]

resid_mat <- as.matrix(resid_mat)

post_intPCTT_smoke_gam.fpca <- fpca.face(resid_mat, knots = 15)

#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_intPCTT_smoke_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_intPCTT_smoke_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1

post_intPCTT_smoke.gam.df <- merge(post_intPCTT_smoke.gam.df, Phi_mat, 
                        by = "frame", 
                        all.x = T)

post_intPCTT_smoke.gam.df <- post_intPCTT_smoke.gam.df[order(post_intPCTT_smoke.gam.df$subject_id,
                                                             post_intPCTT_smoke.gam.df$frame), ]
post_intPCTT_smoke.gam.df$subject_id <- as.factor(post_intPCTT_smoke.gam.df$subject_id)

post_intPCTT_smoke.gam.fri <- mgcv::bam(percent_change ~ 
                            s(sind, k=30, bs="cr") +
                            s(sind, by=smoker, k=30, bs = "cr") +
                            s(sind, by=smoke_PCTT, k=30, bs = "cr") +
                            s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
                            s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"), 
                          method = "fREML", data = post_intPCTT_smoke.gam.df, discrete = TRUE)


summary(post_intPCTT_smoke.gam.fri)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker, 
##     k = 30, bs = "cr") + s(sind, by = smoke_PCTT, k = 30, bs = "cr") + 
##     s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2, 
##     bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id, 
##     by = Phi4, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.3191     0.9644   -10.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df        F p-value    
## s(sind)            27.31  28.28   119.67  <2e-16 ***
## s(sind):smoker     19.50  22.79    68.66  <2e-16 ***
## s(sind):smoke_PCTT 19.44  22.89    21.21  <2e-16 ***
## s(subject_id):Phi1 82.17  84.00 23297.74  <2e-16 ***
## s(subject_id):Phi2 81.60  84.00  2639.64  <2e-16 ***
## s(subject_id):Phi3 82.24  84.00   792.85  <2e-16 ***
## s(subject_id):Phi4 81.01  84.00  1141.93  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.966   Deviance explained = 96.6%
## fREML =  63220  Scale est. = 2.6117    n = 32642

4.3 Trajectories for smokers with different PCTT and non-smokers

# create plot categories based on the 25th, 50th and 75th percentile
plot_cat <- c(60, 65, 70) - round(mean(pt.analytic.df$postConsumpTimeToTest, na.rm = T), 3) 

post_intPCTT_smoke_gam.coef <- post_intPCTT_smoke.gam.fri$coefficients
post_intPCTT_smoke_gam.cov <- vcov.gam(post_intPCTT_smoke.gam.fri)

### -- Non-smoker
df_pred_non <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind), 
                          "smoker" = 0, 
                          "smoke_PCTT" = 0, 
                          "dly_PCTT" = 0,
                          "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0, 
                          "subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])

lpmat_non <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")

### -- Smoker with median PCTT
df_pred_smoke50 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind), 
                          "smoker" = 1, 
                          "smoke_PCTT" = plot_cat[2], 
                          "dly_PCTT" = 0,
                          "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0, 
                          "subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])

lpmat_smoke50 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke50, se.fit=TRUE, type = "lpmatrix")

### -- Smoker with 25th%-ile PCTT
df_pred_smoke25 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind), 
                          "smoker" = 1, 
                          "smoke_PCTT" = plot_cat[1], 
                          "dly_PCTT" = 0,
                          "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0, 
                          "subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])

lpmat_smoke25 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke25, se.fit=TRUE, type = "lpmatrix")


### -- Smoker with 75th%-ile PCTT
df_pred_smoke75 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind), 
                          "smoker" = 1, 
                          "smoke_PCTT" = plot_cat[3], 
                          "dly_PCTT" = 0,
                          "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0, 
                          "subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])

lpmat_smoke75 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke75, se.fit=TRUE, type = "lpmatrix")


pred_df <- data.frame(seconds = rep(seq(0, 400), 4)/30,
                      user = c(rep("No use", 401), 
                               rep("Cannabis use (60 min)", 401), 
                               rep("Cannabis use (65 min)", 401),
                               rep("Cannabis use (70 min)", 401)
                               ), 
                      mean = c(lpmat_non %*% post_intPCTT_smoke_gam.coef, 
                               lpmat_smoke25 %*% post_intPCTT_smoke_gam.coef,
                               lpmat_smoke50 %*% post_intPCTT_smoke_gam.coef, 
                               lpmat_smoke75 %*% post_intPCTT_smoke_gam.coef
                               ), 
                      se = c(sqrt(diag(lpmat_non %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_non))), 
                             sqrt(diag(lpmat_smoke25 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke25))),
                             sqrt(diag(lpmat_smoke50 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke50))),                              sqrt(diag(lpmat_smoke75 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke75)))
                             ), 
                      stringsAsFactors = F)

post_smokerProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
                    geom_line(linewidth = 1.001)+
                    labs(y = "Percent change in pupil response", 
                         x = "Seconds from start of light test", 
                         col = "")+
                    scale_x_continuous(expand = c(0, 0), limits = c(0,10), 
                                       breaks = c(0,2,4,6,8, 10))+
                    scale_color_manual(values = c(user_col[c(2, 9, 11, 13, 15)]), 
                                      breaks = c("No use", "Cannabis use (60 min)",
                                                 "Cannabis use (65 min)", "Cannabis use (70 min)"))+
                    theme_bw()+
                    theme(legend.position = c(0.6,0.8))

post_smokerProfile

final.pctt <- grid.arrange(pctt_hist, post_smokerProfile, 
                          layout_matrix = cbind(1,2))

labeled.pctt.plot <- as_ggplot(final.pctt) +
                    draw_plot_label(label = c("A", "B"), size = 15, 
                                    x = c(0, 0.5), y = c(1, 1))

labeled.pctt.plot